# -*- coding: utf-8 -*-

from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

from ..items import MyItemLoader, Ch03Item


class Quotes03Spider(CrawlSpider):
    """
    使用CrawlSpider横向和纵向爬虫演示。运行：scrapy crawl quotes03
    """
    name = 'quotes03'
    allowed_domains = ['localhost']
    start_urls = [
        'http://localhost:8000/'
    ]
    rules = (
        Rule(LinkExtractor(allow=r'page/'), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        for quote in response.xpath('//div[@class="col-md-8"]/div[@class="quote"]'):
            # Create the loader using the selector
            l = MyItemLoader(item=Ch03Item(), selector=quote)

            # xpath
            l.add_xpath('text', './span[@class="text"]/text()')
            l.add_xpath('author', './span/small[@class="author"]/text()')
            l.add_xpath('tags', './div[@class="tags"]/a/text()')

            yield l.load_item()
